from lxml import etree

# 绝对路径|完整路径
# /html/body/div[@class="content"]/div/div[@id="container"]/div/div[@class="listLeft"]/div[@class="bookList"]/ul/li
# 相对路径
# 1、id属性的某个标签，是全局唯一 2、当class属性也是唯一
# //div[@class="bookList"]/ul/li

# 豆瓣读书【新书速递】
# //div[@class="section books-express"]/div[@class="bd"]/div[1]/div/ul[2]/li
# //div[@class="section books-express"]/div[@class="bd"]/div[1]/div/ul[2]/li[last()]  拿到最后一个li标签

'''
例子1：
'''
# html1 = """
# <!DOCTYPE html>
# <html>
#  <head lang='en'>
#     <meta charest='utf-8'>
#     <title></title>
#  </head>
#  <body>
#     <div id="test-1">shujia1</div>
#     <div id="test-2">shujia2</div>
#     <div id="testdefault">shujia3</div>
#  </body>
# </html>
# """
#
# # 将符合html格式的字符串转成可以编写xpath语法的格式
# info1 = etree.HTML(html1)
# # //div[@id="test-2"]/text()
# res1 = info1.xpath('//div/text()')
# print(res1, type(res1))

'''
例子2：
'''

html2 = """
<!DOCTYPE html>
<html>
 <head lang='en'>
    <meta charest='utf-8'>
    <title></title>
 </head>
 <body>
    <div id="test3">
    我左青龙,
        <span id='tiger'>
            右白虎
            <ul>上朱雀,
                <li>下玄武,</li>
            </ul>
        </span>
        龙头在胸口
    </div>
 </body>
</html>
"""
info2 = etree.HTML(html2)
res1 = str(info2.xpath('string(.)'))
res1 = res1.replace("\n","").replace(" ","").replace("\t","")
print(res1)

